nfl_data=read.csv('C:\\Users\\Sean\\OneDrive\\Desktop\\Grad School\\Machine Learning\\Module 2 - Intro to R\\nfl_19to22data.csv')
# Data Source: https://www.advancedsportsanalytics.com/nfl-raw-data
nfl_data
The dataset contains statistics for passing, rushing, and receiving per game for NFL players since 2019.
summary(nfl_data[,c("player","team","pass_yds","pass_td")])
## player team pass_yds pass_td
## Length:19973 Length:19973 Min. : -2.00 Min. :0.0000
## Class :character Class :character 1st Qu.: 0.00 1st Qu.:0.0000
## Mode :character Mode :character Median : 0.00 Median :0.0000
## Mean : 20.57 Mean :0.1317
## 3rd Qu.: 0.00 3rd Qu.:0.0000
## Max. :525.00 Max. :5.0000
Qualitative Variables:‘player’ & ‘team’ Quantitative Variables: ‘pass_yds’ & ‘pass_td’
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
nfl_data=nfl_data%>%mutate(pass_cmp=(pass_cmp/pass_att))
head(nfl_data)
summary(nfl_data$pass_cmp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.571 0.652 0.636 0.724 1.000 17963
Changed variable pass_cmp to return the ratio of passes completed instead of the total number of passes completed.
plot(nfl_data$rush_yds)
plot(nfl_data$pass_yds)
Created scatterplots for rushing yards and passing yards per player.